#!/usr/bin/perl -w

# -----------------------------------------------------------------------------
# check_bandwidth 2.0.0rc1
#    ~ Nagios(r) SNMP Network Traffic Monitor Plugin
#    ~ Copyright 2008, Jonathan Wright <jonathan (at) jabwebsolutions.co.uk>
#    ~ based on the check_traffic plugin by Adrian Weiczorek
#      and check_snmp_cisco_ifstatus by Altinity Limited
# -----------------------------------------------------------------------------
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307
# -----------------------------------------------------------------------------
# Note: Nagios is a registered trademark of Ethan Galstad. 

# setup Perl
use strict;
use diagnostics;
use warnings;

# import modules
use Net::SNMP::Interfaces;

# -----------------------------------------------------------------------------
# define the variabes we're going to need and then fill them
our (%_status, %_options, $_cycle);

# status codes to return upon various conditions (as understood by Nagios(r))
%_status   = (
  'UNKNOWN'  => '-1', 'OK' => '0', 'WARNING' => '1', 'CRITICAL' => '2'
);

# command-line controlled options (including defaults)
%_options = (
  'version'    => '2c',     'interface'     => '',
  'hostname'   => '',       'community'     => 'public',
  'port'       => 161,      'timeout'       => 15,
  'warning'    => 75,       'critical'      => 90,
  'on-the-fly' => 0,        'check-up'      => 0,
  'pause'      => 10,       'use-bytes'     => 0,
  'use-mega'   => 0,        'default-speed' => 10000,
  'override'   => 0,        'down-warning'  => 0
);

# set the location to save the temporary data to (based on Windows and
#  anything else (MacOS, Linux & Unix all has /tmp) -- can be overriden later
$_options{'save'} = ($^O =~ /^MSWin/ ? 'C:\Windows\Temp' : '/tmp/.traffic');

# calculate the point at which the counter will cycle (based on 32-bit counter):
#  if the interface supports 64-bit counters, this will be adjusted accordingly
$_cycle = ((1<<31)*2);

# -----------------------------------------------------------------------------
# start the program by processing the command-line options
while (my $arg = shift) {
  # test against known arguments, and assign variables based on them
  if    ($arg =~ /^-(H|-hostname)$/)     { $_options{'hostname'}     = shift;     }
  elsif ($arg =~ /^-(C|-community)$/)    { $_options{'community'}    = shift;     }
  elsif ($arg =~ /^-(P|-port)$/)         { $_options{'port'}         = shift;     }
  elsif ($arg =~ /^-(i|-interface)$/)    { $_options{'interface'}    = shift;     }
  elsif ($arg =~ /^-(o|-override)$/)     { $_options{'override'}     = shift;     }
  elsif ($arg =~ /^-(w|-warning)$/)      { $_options{'warning'}      = uc(shift); }
  elsif ($arg =~ /^-(c|-critical)$/)     { $_options{'critical'}     = uc(shift); }
  elsif ($arg =~ /^-(t|-timeout)$/)      { $_options{'timeout'}      = shift;     }
  elsif ($arg =~ /^-(p|-pause)$/)        { $_options{'pause'}        = shift;     }
  elsif ($arg =~ /^-(s|-save)$/)         { $_options{'save'}         = shift;     }
  elsif ($arg =~ /^-(f|-on-the-fly)$/)   { $_options{'on-the-fly'}   = 1;         }
  elsif ($arg =~ /^-(u|-check-up)$/)     { $_options{'check-up'}     = 1;         }
  elsif ($arg =~ /^-(d|-down-warning)$/) { $_options{'down-warning'} = 1;         }
  elsif ($arg =~ /^-(b|-use-bytes)$/)    { $_options{'use-bytes'}    = 1;         }
  elsif ($arg =~ /^-(m|-use-mega)$/)     { $_options{'use-mega'}     = 1;         }
  elsif ($arg =~ /^-(h|-help)$/)         { usage(1); }
  else {
    # if argument is unknown, then output error and exit
    __issue('Unknown option: '.$arg);
  }
}

# before we use the variables/options from the command line, we need to make
#  sure they are in the correct format and are usable first
foreach my $key (keys %_options) {
  if (($key eq 'hostname' || $key eq 'interface') && $_options{$key} eq '') {
    __issue('No value given for --'.$key);
  }
    
  if ($key eq 'hostname') {
    __issue('Invalid hostname given ('.$_options{$key}.')')
      unless (lc($_options{$key}) =~ /^[a-z0-9_-]+(\.[a-z0-9_-]+)+$/);
  }

  if ($key eq 'warning' || $key eq 'critical') {
    __issue('Invalid value given for --'.$key)
      unless ($_options{$key} =~ /^[0-9]+[BKMGEP]?(,[0-9]+[BKMGEP]?)?$/);
  }

  if ($key eq 'override' || $key eq 'timeout' || $key eq 'pause' || $key eq 'port') {
    __issue('Invalid value given for --'.$key)
      unless ($_options{$key} =~ /^[0-9]+$/);
  }
}

# -----------------------------------------------------------------------------
our ($interface, $cache, %count, %recount);

# start the program by getting the interface requested from the SNMP agent
$interface = __get_interface($_options{'interface'});

# check to see if the interface is up before processing bandwidth usage
__check_up($interface) if ($_options{'check-up'});

# unless on-the-fly requested, try to find and open the cache file for the
#  interface, retrieving the previous values
$cache = $_options{'save'}.'/'.$_options{'hostname'}.'/'.$interface->index();
if ($_options{'on-the-fly'} || !(-e $cache)) {
  # fetch the inital set of results from the device before sleeping for the
  #  alloted time to calculate the bandwidth value
  %count = __get_count($interface);
  sleep($_options{'pause'});
} else {
  # open the file and get the contents
  open (CACHE, "< $cache") or
    __unknown('Cannot open interface cache file for reading ('.$cache.')');
    while (<CACHE>) {
      # split each line and add it into the %count hash
      chomp;
      my ($key, $value) = split(':');
      $count{$key} = $value;
    }
  close(CACHE);

  unless (
    # make sure we have values for speed, tx and rx
    (($count{'ifSpeed'} > 0) && ($count{'ifReceived'} > 0) && ($count{'ifTransmitted'} > 0)) &&
    # make sure that there is no chance that the counter has cycled around
    #  (i.e. the counter hasn't cycled based on the speed of the interface)
    #  TODO: this may be too short - look into coding comparison between
    #        this and a minimum period the data should be valid for (10m)
    ((time - $count{'ifTimeStamp'}) < ($_cycle/($count{'ifSpeed'}/8)))
  ) {
    # if any of these fail, ignore the values and fetch a fresh set of results
    #  from the device, before sleeping for the alloted time
    %count = __get_count($interface);
    sleep($_options{'pause'});
  }
}

# fetch the next set of values from the device which will be calculated
#  against the original set to work out bandwidth
%recount = __get_count($interface);

#print ' -> %count'."\n";
#foreach my $key (keys %count) {
#  print ' --> '.$key.' => '.$count{$key}."\n" }
#print ' -> %recount'."\n";
#foreach my $key (keys %recount) {
#  print ' --> '.$key.' => '.$recount{$key}."\n" }

# save the data from %recount above back into the cache file, but first check
#  that the required directories exist first so that we can write the file
unless ($_options{'on-the-fly'}) {
  # check (and make) the main root directory for the storage
  mkdir ($_options{'save'})
    or __unknown('Cannot create storage directory')
    unless (-e $_options{'save'});
  # check (and make) the hostname directory for the interface files
  mkdir ($_options{'save'}.'/'.$_options{'hostname'})
    or __unknown('Cannot create storage directory for hostname')
    unless (-e $_options{'save'}.'/'.$_options{'hostname'});
  # save the data, one file per interface
  open (CACHE, "> $cache") or
    __unknown('Cannot open interface cache file for writing ('.$cache.')');
    foreach my $key (keys %recount) {
      print CACHE $key.":".$recount{$key}."\n";
    }
  close(CACHE);
}

# register variables we're going to use in the next section
our($tx, $rx, $report, $status);

# calculate the bandwidth used (data transferred/time), but to prevent negative
#  transfers (and associated issues with limits), if the newer recount value is
#  less than the cached value, the counter has cycled. Therefore, add the
#  maximum possible value (at which the counter cycled) to the newer records
$recount{'ifReceived'} += $_cycle
  if ($recount{'ifReceived'} < $count{'ifReceived'});
$recount{'ifTransmitted'} += $_cycle
  if ($recount{'ifTransmitted'} < $count{'ifTransmitted'});
# calculate bandwidth used (tx/rx are in bytes, convert to bits as all 
#  internal data and calcualtions should be done in bits)
$rx = (8*($recount{'ifReceived'} - $count{'ifReceived'})) /
  ($recount{'ifTimeStamp'} - $count{'ifTimeStamp'});
$tx = (8*($recount{'ifTransmitted'} - $count{'ifTransmitted'})) / 
  ($recount{'ifTimeStamp'} - $count{'ifTimeStamp'});

# test the values calculated against the warning and critical limits
#  given and therefore work out which status should be reported to Naguis.
#  First, register the variables we're going to be using
our (%levels, %limits);

# To minimise duplication of code, create a hash table with  the keys as the
#  levels, pointing to the de-referenced sub-routines which will report
#  the status back to Nagios(r)
%levels = (
  # start with CRITICAL first, as it's higher than WARNING and can be 
  #  triggered, even if WARNING has a higher trigger than CRITICAL
  'critical' => \&__critical,
  'warning'  => \&__warning
);

# breakdown the limits into the tx/rx valus, checking if we have two separate
#  values for the limits on each type
foreach my $key (keys %levels) {
   # yep, so split
  ($limits{$key}{'tx'}, $limits{$key}{'rx'}) = 
    ($_options{$key} =~ /^[0-9]+[BKMGEP]?,[0-9]+[BKMGEP]?$/ ?
     split(',', $_options{$key}) : ($_options{$key}, $_options{$key}));

  # if the values have no suffix (i.e. K, M or G), then they're going to be
  #  %age values - re-set the limit values to %age of available bandwidth
  $limits{$key}{'tx'} = $recount{'ifSpeed'}*($limits{$key}{'tx'}/100)
    if ($limits{$key}{'tx'} =~ /^[0-9]+$/);
  $limits{$key}{'rx'} = $recount{'ifSpeed'}*($limits{$key}{'rx'}/100)
    if ($limits{$key}{'rx'} =~ /^[0-9]+$/);
}

# create the text report which we're going to send back to Nagios(r) (and can be
#  read by the admin via the site or via a notice)
$report = sprintf(
  # Nagios(r) Plugin Report: 'Status Information|Performance Data'
  'Out: %1$sps; In: %3$sps (Sent %5$s, Received %6$s in %7$s seconds)|out=%4$s;%8$s;%10$s;0;%12$s in=%2$s;%9$s;%11$s;0;%12$s',
  __adjust($tx), $tx, __adjust($rx), $rx,                       # 1, 2, 3, 4
  __adjust($recount{'ifTransmitted'}-$count{'ifTransmitted'}),  # 5
  __adjust($recount{'ifReceived'}-$count{'ifReceived'}),        # 6
  ($recount{'ifTimeStamp'}-$count{'ifTimeStamp'}),              # 7
  __convert_limit($limits{'warning'}{'tx'}),                    # 8
  __convert_limit($limits{'warning'}{'rx'}),                    # 9
  __convert_limit($limits{'critical'}{'tx'}),                   # 10
  __convert_limit($limits{'critical'}{'rx'}),                   # 11
  $recount{'ifSpeed'}                                           # 12
);

# process each level, and break down the limits into their tx/rx values so we
#  can test them against the 
foreach my $key (keys %levels) {
  # run tests against each of the tx and rx values, triggering the
  #  de-referenced sub-routine if either of them trigger
  $levels{$key}($report)
    if (__test($limits{$key}{'tx'}, $tx) or __test($limits{$key}{'rx'}, $rx));
}

# if we've reached this stage, no errors have been triggered and so
#  it's safe to report that everything is OK. Return information and exit
#  with OK status value.
print 'OK '.$report;
exit($_status{'OK'});

# -----------------------------------------------------------------------------

# get the details of the interface from the SNMP agent
sub __get_interface {
  # get the name (or index) of the interface the client needs
  our ($name) = shift;

  # set the rest of the variables to be used in this function
  our ($interfaces);

  # begin by trying to esablish a connection with the SNMP agent on the device
  #  under the community given, and retrive all the interface information.
  eval {
    # setup timeout override on SNMP request
    local $SIG{ALRM} = sub { die "SNMP_Alarm\n" };
    alarm($_options{'timeout'});
    # run the request for interface details via the Net::SNMP libaray
    $interfaces = Net::SNMP::Interfaces->new(
      Hostname  => $_options{'hostname'},
      Community => $_options{'community'},
      Port      => $_options{'port'}
    );
    # if we've reached this point the last command didn't timeout, so reset the
    #  alarm so it doesn't throw else-where in the code.
    alarm(0);
  };

  # If Net::SNMP::Interfaces doesn't work, exit as we cannot continue, and if
  #  there is another error, propagate it.
  # die if $@ && $@ ne "SNMP_Alarm\n";
  critical('SNMP agent is not responding or access denied -- please check your settings & try again.|out=0;0;0;0;0 in=0;0;0;0;0')
    unless (defined($interfaces) || $@);

  # Now process the array from the SNMP agent to see if the interface exists
  foreach my $interface ($interfaces->all_interfaces()) {
    return $interface
      if ($interface->index eq $name || $interface->name eq $name)
  }

  # if run over all the interfaces, we couldn't find the one requested and so
  #  cannot continue - through an UNKNOWN response back to Nagios(tm)
  __unknown('Interface '.$name.' does not exist on '.$_options{'hostname'});
}    
 
sub __check_up {
  # get the Net::SNMP::Interface::Details object we're going to work with
  our ($interface) = shift;

  if ($interface->ifOperStatus() != 1) {
    my ($message) = 'Interface '.$interface->name().
      ' is currently down (state: '.$interface->ifOperStatus().')';
    ($_options{'down-warning'} eq 1 ?  __warning($message) : __critical($message));
  }
}

# get the data from the interface and arrange it into a useable format for rest
#  of the program's runtime
sub __get_count {
  # get the Net::SNMP::Interface::Details object we're going to work with
  our ($interface) = shift;
  my ($use_hc);

  if (defined($interface->ifHCInOctets())) {
    $use_hc = 1;
    $_cycle = ((1<<63)*2);  
  }

  # create and return a hash table with all the values returned
  return my %values = (
    # the time of the data needs to be recorded along with the data itself
    'ifTimeStamp'   => time,
    # if the speed has been overridden at the command line, ignore the value
    #  from the SNMP data returned by the device
    'ifSpeed'       => ($_options{'override'} ?
                        $_options{'override'} : $interface->ifSpeed()),
    # use the remaining data as normal
    'ifReceived'    => ($use_hc ? $interface->ifHCInOctets() : $interface->ifInOctets()),
    'ifTransmitted' => ($use_hc ? $interface->ifHCOutOctets() : $interface->ifOutOctets()),
    'ifDescription' => $interface->ifDescr()
  );
}

sub __adjust {
  # register the variables we're going to need
  our ($value, $ext);

  # get the value we're going to report (and convert it into bytes if
  #  requested by the --bytes command-line argument)
  $value = ($_options{'use-bytes'} ? (shift)/8 : shift);
  # set $ext to the default of 'bytes' (i.e. 'b')
  # $ext = 'b';
  $ext = '';

  # if the --use-mega command-line option has been supplied, force all
  #  conversion to multipuls of megabyte or megabit.
  if ($_options{'use-mega'}) {
    $value = ($value/(1024*1024));
    $ext   = 'M';

  # otherwise keep diving by 1024 while we still have suffixes available,
  #  until we have a human-readable number (i.e. between 1 and 1024)
  } else {
    my (@exts) = qw(K M G E P);
    while ($value > 1024 && scalar @exts > 0) {
      $value = ($value/1024);
      $ext = shift @exts;
    }
  }

  # return the value, formatting to 2 decimal places and correct termonology
  #  for bits and bytes
  return sprintf('%0.2f%s', $value, $ext.($_options{'use-bytes'} ? 'B' : 'b'));
}

# take the value and the limit and compare so see if we've passed it
sub __test {
  # register the variables we're going to need
  our($limit, $multi, $value);

  # get the supplied limit
  $limit = shift;

  # get the value we're going to report (and convert it into bytes if
  #  requested by the --bytes command-line argument)
  $value = ($_options{'use-bytes'} ? (shift)/8 : shift);

  # test the value against the limit - only return true of the value has
  #  passed the limit
  return ($value > __convert_limit($limit));
}

sub __convert_limit {
  our ($limit) = shift;
  # if the limit has a suffex, it not yet an absolute value which can be
  #  compared - convert it back to bits based on the suffix
  if ($limit =~ /^[0-9]+[BKMGEP]$/) {
    # get the suffix
    our($multi) = chop($limit);
    # and do the calculation
    if    ($multi eq 'K') { $limit = $limit*(1024);    }
    elsif ($multi eq 'M') { $limit = $limit*(1024**2); }
    elsif ($multi eq 'G') { $limit = $limit*(1024**3); }
    elsif ($multi eq 'E') { $limit = $limit*(1024**4); }
    elsif ($multi eq 'P') { $limit = $limit*(1024**5); }
  }

  return $limit;
}

# handle standard error message (due to incorrect configuration)
sub __issue {
  our ($message) = shift;
  print "ERROR $message\n" unless ($message eq '');
  __usage(0);
}

# handle warning error messages
sub __warning {
  # retrieve the error message and set a default if none given
  #  before outputting and exiting
  our ($message) = (($message = shift) eq '' ? 'No error message given' : $message);
  print "WARNING $message\n";

  # make sure any SNMP session has been closed and exit with WARNING status
  exit($_status{'WARNING'});
}

# handle critical error messages
sub __critical {
  # retrieve the error message and set a default if none given
  #  before outputting and exiting
  our ($message) = (($message = shift) eq '' ? 'No error message given' : $message);
  print "CRITICAL $message\n";

  # make sure any SNMP session has been closed and exit with CRITICAL status
  exit($_status{'CRITICAL'});
}

# handle unknown error messages
sub __unknown {
  # retrieve the error message and set a default if none given
  #  before outputting and exiting
  our ($message) = (($message = shift) eq '' ? 'No error message given' : $message);
  print "UNKNOWN $message\n";

  # make sure any SNMP session has been closed and exit with UNKNOWN status
  exit($_status{'UNKNOWN'});
}

# -----------------------------------------------------------------------------
# program usage guidelines
sub __usage {
  # get and test to see if we need to display the full help output
  my $full = (shift == 1 ? 1 : 0);

  # output about and usage (depending on $full above)
  print "check_bandwidth v2.0.0rc1\n Copyright 2008 Jonathan Wright <jonathan\@jabwebsolutions.co.uk>\n\n".
    "Poll (via SNMP) a network port and calculate bandwidth usage\n\n"
    if $full;
  print "Usage: check_bandwidth --hostname hostname --interface name\n".
    "         [--community name] [--port port] [--timeout seconds]\n".
    "         [--override bits/sec] [--warning value] [--critical value]\n".
    "         [--check-up [--down-warning]] [--use-bytes] [--use-mega]\n".
    "         [--save location] [--on-the-fly] [--pause seconds]\n";
  print "       see --help for further information\n"
    unless $full;
  print "\nOptions:\n\n".
    " -h, --help\n".
    "    Display this help message\n\n".
    " -H, --host STRING\n".
    "    IP address or hosting of (remote) device to query\n\n".
    " -i, --interface STRING\n".
    "    Name or number of the interface to be queried\n\n".
    " -C, --community STRING            (default ".$_options{'community'}.")\n".
    "    SNMP community name to use for polling\n\n".
    " -P, --port INTEGER                (default ".$_options{'port'}.")\n".
    "    Port on which to connect to the SNMP agent\n\n".
    " -t, --timeout INTEGER             (default ".$_options{'timeout'}."s)\n".
    "    Set the timeout value for communications with host via SNMP\n\n".
    " -p, --pause INTEGER               (default ".$_options{'timeout'}."s)\n".
    "    Set the length of the pause when calculating results on-the-fly\n".
    "    (or between first checks when no cache value exists)\n\n".
    " -o, --override INTEGER            (default ".($_options{'override'}?$_options{'override'}:'Off').")\n".
    "    Override the maximum throughput available on selected port\n\n".
    " -f, --on-the-fly                  (default ".($_options{'on-the-fly'}?'On':'Off').")\n".
    "    Perform all calculation of bandwidth on-the-fly and don't use store\n\n".
    " -u, --check-up                    (default ".($_options{'check-up'}?'On':'Off').")\n".
    "    Check the switch port is UP before checking bandwidth usage. If the port\n".
    "    is down generate a CRITICAL response and exit (won't check usage)\n\n".
    " -d, --down-warning                (default ".($_options{'down-warning'}?'On':'Off').")\n".
    "    Set --check-up to return a WARNING response instead of CRITICAL (will\n".
    "    have no effect until --check-up enabled as well)\n\n".
    " -w, --warning INTEGER[,INTEGER]   (default ".$_options{'warning'}.($_options{'warning'} =~ /^[0-9]+[BKMGEP]$/ ? '' : '%').")\n".
    "    Set the %age use of available bandwidth (taken from port speed, or \n".
    "    --override above) at which to trigger a WARNING. Single value is for both\n".
    "    RX & TX - to specify different limits, use RX,TX\n".
    "    (if B,K,M,G,E or P is appended treat as absolute value in bits, or --bytes)\n\n".
    " -c, --critical INTEGER[,INTEGER]  (default ".$_options{'critical'}.($_options{'critical'} =~ /^[0-9]+[BKMGEP]$/ ? '' : '%').")\n".
    "    Value at which to trigger CRITICAL. Same semantics as --warning above\n\n".
    " -b, --use-bytes                   (default ".($_options{'use-bytes'}?'On':'Off').")\n".
    "    Use bytes instead of bits in all calculations (i.e. Megabytes not Megabits)\n\n".
    " -m, --use-mega                    (default ".($_options{'use-mega'}?'On':'Off').")\n".
    "    Force use of Megabit/Megabyte in all output (don't use Kilo or Giga)\n\n".
    " -s, --save                        (default '".$_options{'save'}."')\n".
     "    Change the location where the cache files are saved.\n\n"
   if $full;

  exit($_status{'UNKNOWN'});
}
